list_of_packages <- c("tidyverse", "readxl", "here", "correlation", "DT")
new_packages <- list_of_packages[!(list_of_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(here)
## here() starts at /home/francisko/coding/r/correlacoes-gui
library(correlation)
library(DT)
data_path <-
here::here("data")
files <-
dir(path = data_path,
pattern = "*.xls|*.xlsx")
adsorbed_particles <-
files %>%
purrr::map(~ readxl::read_excel(file.path(data_path, .))) %>%
purrr::set_names(nm = c("particle_count",
"macro",
"micro",
"wetability",
"celular_perimeter",
"ui"
))
adsorbed_particles %>%
purrr::pluck("particle_count") %>%
dplyr::rename(sp = Sp,
face = Face,
frag = Frag,
site = Site) %>%
dplyr::mutate(
site = dplyr::case_when(
site == "BR" ~ 1,
site == "CA" ~ 2,
site == "VZ" ~ 3,
site == "CN" ~ 4,
site == "REF" ~ 5),
`<2.5` = as.numeric(`<2.5`),
`2.5-10` = as.numeric(`2.5-10`),
`10-100` = as.numeric(`10-100`),
total = `<2.5` + `2.5-10` + `10-100`) -> adsorbed_particles$particle_count
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
adsorbed_particles %>%
purrr::pluck("particle_count") %>%
dplyr::group_by(site, sp, Rep, face) %>%
dplyr::summarise(
`<2.5` = mean(`<2.5`),
`2.5-10` = mean(`2.5-10`),
`10-100` = mean(`10-100`),
total = mean(total)
) %>%
dplyr::ungroup() -> adsorbed_particles$particle_count_micro
## `summarise()` has grouped output by 'site', 'sp', 'Rep'. You can override using the `.groups` argument.
adsorbed_particles %>%
purrr::pluck("micro") %>%
dplyr::rename(ind = rep) -> adsorbed_particles$micro
adsorbed_particles %>%
purrr::pluck("macro") %>%
dplyr::select(-Rq) -> adsorbed_particles$macro
adsorbed_particles %>%
purrr::pluck("micro") %>%
dplyr::select(-Sq) -> adsorbed_particles$micro
adsorbed_particles %>%
purrr::keep(stringr::str_detect(names(.), pattern = "micro")) %>%
purrr::reduce(left_join) %>%
dplyr::rename(rep = Rep) -> micro_combined
## Joining, by = c("sp", "face")
I forgot to change sp, site, face and rep to factors so I’ll do it now:
common_vars <- c("sp", "site", "face", "rep")
micro_combined %>%
dplyr::mutate(across(.cols = all_of(common_vars), .fns = forcats::as_factor)) -> micro_combined
adsorbed_particles %>%
purrr::discard(stringr::str_detect(names(.), pattern = "micro")) %>%
purrr::reduce(left_join) %>%
dplyr::rename(rep = Rep) -> all_but_micro_combined
## Joining, by = c("sp", "face", "frag")
## Joining, by = c("sp", "face", "frag", "ind")
## Joining, by = c("sp", "face", "frag", "ind")
## Joining, by = c("sp", "face", "frag", "ind")
I forgot to change sp, site, face and rep to factors so I’ll do it now:
all_but_micro_combined %>%
dplyr::mutate(across(.cols = all_of(common_vars), .fns = forcats::as_factor)) -> all_but_micro_combined
micro_combined %>%
DT::datatable(
extensions = 'Buttons',
options = list(
dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))
)
)
all_but_micro_combined %>%
DT::datatable(
extensions = 'Buttons',
options = list(
dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))
)
)
Since there’s load of repetition in these analyses, I defined functions to make maintaining the code easier.
The following function calculates the pearson correlation between leaf_char and part_size for a given dataset (dat).
calculates_correlation <- function(dat, leaf_char, part_size) {
common_vars <- c("sp", "site", "face", "rep")
dat %>%
select(all_of(common_vars), {{leaf_char}}, {{part_size}}) %>%
group_by(site, sp, face, rep) %>%
summarise(mean({{leaf_char}}), mean({{part_size}})) %>%
ungroup() %>%
correlation()
}
plots_scatterplot <- function(dat, leaf_char, part_size) {
common_vars <- c("sp", "site", "face", "rep")
dat %>%
select(all_of(common_vars), {{leaf_char}}, {{part_size}}) %>%
group_by(site, sp, face, rep) %>%
summarise(mean_char = mean({{leaf_char}}), mean_part = mean({{part_size}})) %>%
ggplot2::ggplot(., aes(x = mean_char, y = mean_part)) +
ggplot2::geom_point(alpha = 0.7) +
ggplot2::geom_smooth(method = "lm",
se = F)
}
calculates_correlation(micro_combined, Sa, `<2.5`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## --------------------------------------------------------------------
## mean(Sa) | mean(`<2.5`) | -0.36 | [-0.57, -0.10] | -2.77 | 0.008**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(micro_combined, Sa, `<2.5`) +
labs(y = "Number of adsorbed particles (<2.5)",
x = "Sa")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(micro_combined, Sa, `2.5-10`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ------------------------------------------------------------------------
## mean(Sa) | mean(`2.5-10`) | -0.44 | [-0.63, -0.19] | -3.50 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(micro_combined, Sa, `2.5-10`) +
labs(
x = "Sa",
y = "Number of adsorbed particles (2.5 - 10)"
)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(micro_combined, Sa, `10-100`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(55) | p
## ---------------------------------------------------------------------
## mean(Sa) | mean(`10-100`) | -0.33 | [-0.55, -0.08] | -2.61 | 0.012*
##
## p-value adjustment method: Holm (1979)
## Observations: 57
plots_scatterplot(micro_combined, Sa, `10-100`) +
labs(
x = "Sa",
y = "Number of adsorbed particles (10-100)"
)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
calculates_correlation(micro_combined, Sa, total)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## -------------------------------------------------------------------
## mean(Sa) | mean(total) | -0.42 | [-0.62, -0.17] | -3.30 | 0.002**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(micro_combined, Sa, total) +
labs(
x = "Sa",
y = "Total number of Adsorbed particles"
)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Ra, `<2.5`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ----------------------------------------------------------------------
## mean(Ra) | mean(`<2.5`) | -0.79 | [-0.88, -0.67] | -9.44 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Ra, `<2.5`) +
labs(y = "Number of adsorbed particles (<2.5)",
x = "Ra")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Ra, `2.5-10`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## -------------------------------------------------------------------------
## mean(Ra) | mean(`2.5-10`) | -0.82 | [-0.89, -0.70] | -10.26 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Ra, `2.5-10`) +
labs(y = "Number of adsorbed particles (2.5 - 10)",
x = "Ra")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Ra, `10-100`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(55) | p
## ------------------------------------------------------------------------
## mean(Ra) | mean(`10-100`) | -0.62 | [-0.76, -0.43] | -5.84 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 57
plots_scatterplot(all_but_micro_combined, Ra, `10-100`) +
labs(y = "Number of adsorbed particles (10 - 100)",
x = "Ra")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Ra, total)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ----------------------------------------------------------------------
## mean(Ra) | mean(total) | -0.83 | [-0.90, -0.72] | -10.54 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Ra, total) +
labs(y = "Total number of adsorbed particles",
x = "Ra")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Angle, `<2.5`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ---------------------------------------------------------------------
## mean(Angle) | mean(`<2.5`) | -0.37 | [-0.58, -0.11] | -2.87 | 0.006**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Angle, `<2.5`) +
labs(y = "Number of adsorbed particles (<2.5)",
x = "Angle")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Angle, `2.5-10`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## -------------------------------------------------------------------------
## mean(Angle) | mean(`2.5-10`) | -0.46 | [-0.65, -0.22] | -3.71 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Angle, `2.5-10`) +
labs(y = "Number of adsorbed particles (2.5 - 10)",
x = "Angle")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Angle, `10-100`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(55) | p
## ----------------------------------------------------------------------
## mean(Angle) | mean(`10-100`) | -0.33 | [-0.55, -0.08] | -2.61 | 0.012*
##
## p-value adjustment method: Holm (1979)
## Observations: 57
plots_scatterplot(all_but_micro_combined, Angle, `10-100`) +
labs(y = "Number of adsorbed particles (10 - 100)",
x = "Angle")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Angle, total)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## --------------------------------------------------------------------
## mean(Angle) | mean(total) | -0.43 | [-0.63, -0.19] | -3.46 | 0.001**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Angle, total) +
labs(y = "Total number of adsorbed particles",
x = "Angle")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Perimeter, `<2.5`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ------------------------------------------------------------------------
## mean(Perimeter) | mean(`<2.5`) | -0.30 | [-0.52, -0.03] | -2.26 | 0.028*
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Perimeter, `<2.5`) +
labs(y = "Number of adsorbed particles (<2.5)",
x = "Perimeter")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Perimeter, `2.5-10`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ---------------------------------------------------------------------------
## mean(Perimeter) | mean(`2.5-10`) | -0.38 | [-0.59, -0.13] | -2.99 | 0.004**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Perimeter, `2.5-10`) +
labs(y = "Number of adsorbed particles (2.5 - 10)",
x = "Perimeter")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Perimeter, `10-100`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(55) | p
## --------------------------------------------------------------------------
## mean(Perimeter) | mean(`10-100`) | -0.29 | [-0.51, -0.03] | -2.23 | 0.030*
##
## p-value adjustment method: Holm (1979)
## Observations: 57
plots_scatterplot(all_but_micro_combined, Perimeter, `10-100`) +
labs(y = "Number of adsorbed particles (10 - 100)",
x = "Perimeter")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, Perimeter, total)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ------------------------------------------------------------------------
## mean(Perimeter) | mean(total) | -0.36 | [-0.57, -0.10] | -2.77 | 0.008**
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, Perimeter, total) +
labs(y = "Total number of adsorbed particles",
x = "Perimeter")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, UI, `<2.5`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ----------------------------------------------------------------------
## mean(UI) | mean(`<2.5`) | -0.44 | [-0.63, -0.19] | -3.49 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, UI, `<2.5`) +
labs(y = "Number of adsorbed particles (<2.5)",
x = "UI")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, UI, `2.5-10`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ------------------------------------------------------------------------
## mean(UI) | mean(`2.5-10`) | -0.52 | [-0.69, -0.29] | -4.40 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, UI, `2.5-10`) +
labs(y = "Number of adsorbed particles (2.5 - 10)",
x = "UI")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, UI, `10-100`)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(55) | p
## ----------------------------------------------------------------------
## mean(UI) | mean(`10-100`) | -0.38 | [-0.58, -0.13] | -3.03 | 0.004**
##
## p-value adjustment method: Holm (1979)
## Observations: 57
plots_scatterplot(all_but_micro_combined, UI, `10-100`) +
labs(y = "Number of adsorbed particles (10 - 100)",
x = "UI")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).
calculates_correlation(all_but_micro_combined, UI, total)
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## # Correlation table (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(52) | p
## ---------------------------------------------------------------------
## mean(UI) | mean(total) | -0.50 | [-0.68, -0.27] | -4.14 | < .001***
##
## p-value adjustment method: Holm (1979)
## Observations: 54
plots_scatterplot(all_but_micro_combined, UI, total) +
labs(y = "Total number of adsorbed particles",
x = "UI")
## `summarise()` has grouped output by 'site', 'sp', 'face'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).